{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from matplotlib.pylab import *\n",
    "from numpy import random\n",
    "from scipy.linalg import hadamard\n",
    "import pdb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def figsize(width,height):\n",
    "    rcParams['figure.figsize'] = (width,height)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#%matplotlib notebook\n",
    "%matplotlib inline\n",
    "\n",
    "figsize(15, 8)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#plot(rand(10))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def _B_pm1(d, rng=None):\n",
    "    if rng is None:\n",
    "        return random.randint(2, size=(d)) * 2 - 1\n",
    "    else:\n",
    "        return rng.randint(2, size=(d)) * 2 - 1\n",
    "\n",
    "def B_pm1(d, rng=None):\n",
    "    return diag(_B_pm1(d, rng=rng))\n",
    "\n",
    "def _G_gauss(d, rng=None):\n",
    "    if rng is None:\n",
    "        return random.normal(0, 1, d)\n",
    "    else:\n",
    "        return rng.normal(0, 1, d)\n",
    "\n",
    "def G_gauss(d, rng=None):\n",
    "    return diag(_G_gauss(d, rng=rng))\n",
    "\n",
    "def _Pi_perm(x, rng=None):\n",
    "    '''Fast perm, apply right away'''\n",
    "    d = x.shape[0]\n",
    "    if rng is None:\n",
    "        return x[random.permutation(d)]\n",
    "    else:\n",
    "        return x[rng.permutation(d)]\n",
    "    \n",
    "def _Pi_perm_order(d, rng=None):\n",
    "    '''Fast perm, return perm order'''\n",
    "    if rng is None:\n",
    "        return random.permutation(d)\n",
    "    else:\n",
    "        return rng.permutation(d)\n",
    "    \n",
    "def Pi_perm(d, rng=None):\n",
    "    '''Slow perm matrix'''\n",
    "    if rng is None:\n",
    "        return eye(d)[random.permutation(d)]\n",
    "    else:\n",
    "        return eye(d)[rng.permutation(d)]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Dense version"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "d = 2**9\n",
    "n = 10*d\n",
    "random.seed(1246)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "proj = random.normal(0, 1, (n, d))\n",
    "#proj *= 1. / sqrt((proj**2).sum(1)).reshape(n, 1)\n",
    "#proj /= sqrt((proj**2).sum())\n",
    "proj /= sqrt((proj**2).sum(0))[newaxis,:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "(proj**2).sum(0)[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "x = random.normal(0, 1, (d, 1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "y = dot(proj, x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "y[:10]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Check distances"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "delta = random.normal(0, 1, (d, 1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "y_delta = dot(proj, x) - dot(proj, x + delta)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "(delta**2).sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "(y_delta**2).sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "proj = random.normal(0, 1, (n, d))\n",
    "proj /= sqrt((proj**2).sum(0))[newaxis,:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "y = dot(proj, x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sys.getsizeof(proj) / 1e6"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "del proj"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "_=hist(delta, bins=30)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "_=hist(y_delta, bins=30)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Manual version"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "d = 8\n",
    "n = 24"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "B = B_pm1(d)\n",
    "H = hadamard(d)\n",
    "Pi = Pi_perm(d)\n",
    "G = G_gauss(d)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "proj = dot(H, dot(G, dot(Pi, dot(H, B))))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Slow version"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "class ProjectSlow(object):\n",
    "    def __init__(self, d, n, seed=123):\n",
    "        self.d = d\n",
    "        self.n = n\n",
    "        self.rng = random.RandomState(seed)\n",
    "        \n",
    "        self.B = []\n",
    "        self.H = hadamard(self.d)\n",
    "        self.Pi = []\n",
    "        self.G = []\n",
    "\n",
    "        self.float_replicates = float(self.n)/self.d\n",
    "        self.replicates = int(ceil(self.float_replicates))\n",
    "        \n",
    "        for ii in range(self.replicates):\n",
    "            self.B.append(B_pm1(d, rng=self.rng))\n",
    "            self.Pi.append(Pi_perm(d, rng=self.rng))\n",
    "            self.G.append(G_gauss(d, rng=self.rng))\n",
    "\n",
    "    def project_i(self, x, i):\n",
    "        norm_by = sqrt((self.G[i]**2).sum() * self.d)\n",
    "        ret = dot(self.H, dot(self.G[i], dot(self.Pi[i], dot(self.H, dot(self.B[i], x)))))\n",
    "        ret /= norm_by\n",
    "        ret /= sqrt(self.float_replicates)\n",
    "        return ret\n",
    "    \n",
    "    def project(self, x):\n",
    "        rets = []\n",
    "        for ii in range(self.replicates):\n",
    "            rets.append(self.project_i(x, ii))\n",
    "        return vstack(rets)\n",
    "            \n",
    "            \n",
    "            \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "d = 2**9\n",
    "n = 10*d"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "pp = ProjectSlow(d, n, seed=123)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "random.seed(1234)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "x = random.normal(0, 1, (d, 1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "y = pp.project(x)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Check distances"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "delta = random.normal(0, 1, (d, 1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "y_delta = pp.project(x) - pp.project(x + delta)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "(delta**2).sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "(y_delta**2).sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "pp = ProjectSlow(d, n, seed=123)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "pp.project(x)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Fast version?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "class ProjectFast(object):\n",
    "    def __init__(self, d, n, seed=123):\n",
    "        self.d = d\n",
    "        self.n = n\n",
    "        self.rng = random.RandomState(seed)\n",
    "        \n",
    "        self.B = []\n",
    "        self.H = hadamard(self.d)\n",
    "        self.Pi = []\n",
    "        self.G = []\n",
    "\n",
    "        self.float_replicates = float(self.n)/self.d\n",
    "        self.replicates = int(ceil(self.float_replicates))\n",
    "        \n",
    "        for ii in range(self.replicates):\n",
    "            self.B.append(_B_pm1(d, rng=self.rng)[:,newaxis])\n",
    "            self.Pi.append(_Pi_perm_order(d, rng=self.rng))\n",
    "            self.G.append(_G_gauss(d, rng=self.rng)[:,newaxis])\n",
    "\n",
    "    def project_i(self, x, i):\n",
    "        #print i\n",
    "        #pdb.set_trace()\n",
    "        norm_by = sqrt((self.G[i]**2).sum() * self.d)\n",
    "        #norm_by = sqrt((self.G[i]**2).sum())\n",
    "        ret = self.B[i] * x\n",
    "        ret = dot(self.H, ret)\n",
    "        ret = ret[self.Pi[i]]\n",
    "        ret = self.G[i] * ret\n",
    "        ret = dot(self.H, ret)\n",
    "        ret /= norm_by\n",
    "        ret /= sqrt(self.float_replicates)\n",
    "        return ret\n",
    "    \n",
    "    def project(self, x):\n",
    "        rets = []\n",
    "        for ii in range(self.replicates):\n",
    "            rets.append(self.project_i(x, ii))\n",
    "        return vstack(rets)\n",
    "            \n",
    "            \n",
    "            \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "d = 2**13\n",
    "n = 10*d\n",
    "random.seed(1234)\n",
    "x = random.normal(0, 1, (d, 1))\n",
    "delta = random.normal(0, 1, (d, 1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "pp = ProjectFast(d, n, seed=123)\n",
    "#pp = ProjectSlow(d, n, seed=123)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Check distances"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "y_delta = pp.project(x) - pp.project(x + delta)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "(delta**2).sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "(y_delta**2).sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "pp = ProjectFast(d, n, seed=123)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "pp.project(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "sys.getsizeof(pp.H) / 1e6"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Implement Fast Walsh-Hadamard transform"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def fwht(x):\n",
    "    # x is [d,1] where d is a power of 2\n",
    "    if x.shape[0] == 1:\n",
    "        return x\n",
    "    else:\n",
    "        x_top = x[:int(x.shape[0]/2)]\n",
    "        x_bot = x[int(x.shape[0]/2):]\n",
    "        return vstack([fwht(x_top+x_bot), fwht(x_top-x_bot)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "a = np.arange(8)\n",
    "print(a[0::2])\n",
    "print(a[1::2])\n",
    "print(a[0::2] + a[1::2])\n",
    "print(a[0::2] - a[1::2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def fwht_opt(x):\n",
    "    \"\"\"\n",
    "    Modified version of: https://github.com/dingluo/fwht/blob/master/FWHT.py\n",
    "    Fast Walsh-Hadamard Transform\n",
    "    Based on mex function written by Chengbo Li@Rice Uni for his TVAL3 algorithm.\n",
    "    His code is according to the K.G. Beauchamp's book -- Applications of Walsh and Related Functions.\n",
    "    \"\"\"\n",
    "    x = x.squeeze()\n",
    "    N = x.size\n",
    "    G = int(N/2) # Number of Groups\n",
    "    M = 2 # Number of Members in Each Group\n",
    "\n",
    "    # First stage\n",
    "    y = np.zeros((int(N/2),2))\n",
    "    y[:,0] = x[0::2] + x[1::2]\n",
    "    y[:,1] = x[0::2] - x[1::2]\n",
    "    x = y.copy()\n",
    "    # Second and further stage\n",
    "    for nStage in range(2,int(math.log(N,2))+1):\n",
    "        y = np.zeros((int(G/2),M*2))\n",
    "        y[0:int(G/2),0:M*2:4] = x[0:G:2,0:M:2] + x[1:G:2,0:M:2]\n",
    "        y[0:int(G/2),1:M*2:4] = x[0:G:2,0:M:2] - x[1:G:2,0:M:2]\n",
    "        y[0:int(G/2),2:M*2:4] = x[0:G:2,1:M:2] - x[1:G:2,1:M:2]\n",
    "        y[0:int(G/2),3:M*2:4] = x[0:G:2,1:M:2] + x[1:G:2,1:M:2]\n",
    "        x = y.copy()\n",
    "        G = int(G/2)\n",
    "        M = M*2\n",
    "    x = y[0,:]\n",
    "    x = x.reshape((x.size,1))\n",
    "    return x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "class ProjectFaster(object):\n",
    "    def __init__(self, d, n, seed=123):\n",
    "        self.d = d\n",
    "        self.n = n\n",
    "        self.rng = random.RandomState(seed)\n",
    "        \n",
    "        self.B = []\n",
    "        self.Pi = []\n",
    "        self.G = []\n",
    "\n",
    "        self.float_replicates = float(self.n)/self.d\n",
    "        self.replicates = int(ceil(self.float_replicates))\n",
    "        \n",
    "        for ii in range(self.replicates):\n",
    "            self.B.append(_B_pm1(d, rng=self.rng)[:,newaxis])\n",
    "            self.Pi.append(_Pi_perm_order(d, rng=self.rng))\n",
    "            self.G.append(_G_gauss(d, rng=self.rng)[:,newaxis])\n",
    "\n",
    "    def project_i(self, x, i):\n",
    "        #print i\n",
    "        #pdb.set_trace()\n",
    "        norm_by = sqrt((self.G[i]**2).sum() * self.d)\n",
    "        #norm_by = sqrt((self.G[i]**2).sum())\n",
    "        ret = self.B[i] * x\n",
    "        ret = fwht_opt(ret)\n",
    "        ret = ret[self.Pi[i]]\n",
    "        ret = self.G[i] * ret\n",
    "        ret = fwht_opt(ret)\n",
    "        ret /= norm_by\n",
    "        ret /= sqrt(self.float_replicates)\n",
    "        return ret\n",
    "    \n",
    "    def project(self, x):\n",
    "        rets = []\n",
    "        for ii in range(self.replicates):\n",
    "            rets.append(self.project_i(x, ii))\n",
    "        return vstack(rets)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "d = 2**13\n",
    "n = 10*d\n",
    "random.seed(1234)\n",
    "x = random.normal(0, 1, (d, 1))\n",
    "delta = random.normal(0, 1, (d, 1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "pp = ProjectFaster(d, n, seed=123)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "y_delta = pp.project(x) - pp.project(x + delta)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "(delta**2).sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "(y_delta**2).sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "pp = ProjectFaster(d, n, seed=123)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "pp.project(x)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "# Just Hadamard naive vs FFT"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "d = 8\n",
    "n = 8\n",
    "random.seed(1234)\n",
    "x = 1.0 * arange(d) ** 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "H = hadamard(d)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "H"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "hx = dot(H, x)\n",
    "hx"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "plot(x, 'ko-')\n",
    "plot(hx, 'bo-', ms=10)\n",
    "#plot(hx, 'ro-')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "fft(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "absolute(fft(x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def numpy_hadamard(x):\n",
    "    '''Hadamard transform along last dim of x.'''\n",
    "    orig_shape = x.shape\n",
    "    assert len(orig_shape) in (1, 2), 'x should be a vector or matrix'\n",
    "    if len(x.shape) == 1:\n",
    "        x = x.reshape((1,) + x.shape)\n",
    "        \n",
    "    nn = x.shape[-1]\n",
    "    ll = int(round(log(nn) / log(2)))\n",
    "    assert 2**ll == nn, 'x shape last dim must be a power of 2'\n",
    "    expanded_shape = x.shape[0] + tuple([2] * ll)\n",
    "    ret = x.reshape(expanded_shape)\n",
    "\n",
    "    for ii in range(ll):\n",
    "        print ii\n",
    "        ret = \n",
    "    ret = ret.reshape(orig_shape)\n",
    "    return\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "numpy_hadamard(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "# Reshape-based Hadamard implementations"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "## numpy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from fast_walsh_hadamard import np_fast_walsh_hadamard"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Wikipedia example: https://en.wikipedia.org/wiki/Hadamard_transform"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "x = np.array([1,0,1,0,0,1,1,0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "H = hadamard(len(x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dot(x, H)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np_fast_walsh_hadamard(x, axis=0, normalize=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fwht(x).flatten()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print 'This one is different'\n",
    "fwht_opt(x).flatten()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "But applying it twice results in the original vector (rescaled), so at least it's consistent"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fwht_opt(fwht_opt(x).flatten()).flatten() / 8"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Large random matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "d = 2**10\n",
    "n = 1000\n",
    "random.seed(1234)\n",
    "#x = 1.0 * arange(d) ** 2\n",
    "x = random.normal(0, 1, (n, d))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ret = np_fast_walsh_hadamard(x, axis=1)\n",
    "retret = np_fast_walsh_hadamard(ret, axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "((retret - x)**2).sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "_ = np_fast_walsh_hadamard(x, axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "## Tensorflow, Hadamard Transform"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "from fast_walsh_hadamard import tf_fast_walsh_hadamard"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "in_x = tf.placeholder('float32', name='in_x')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "hx = tf_fast_walsh_hadamard(in_x, axis=0, normalize=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "sess = tf.InteractiveSession()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Wikipedia example: https://en.wikipedia.org/wiki/Hadamard_transform"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "x = np.array([1,0,1,0,0,1,1,0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "H = hadamard(len(x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dot(x, H)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "sess.run(hx, feed_dict={in_x: x})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Check of Fastfood transform properties"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "D = 512\n",
    "random.seed(1235)\n",
    "#x = 1.0 * arange(d) ** 2\n",
    "#x = random.normal(0, 1, (n, d))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "B = diag(random.choice((-1, 1), D))\n",
    "H = hadamard(D)\n",
    "G = diag(random.normal(0, 1, D))\n",
    "Pi = Pi_perm(D)\n",
    "S = 12345    # do later"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "V = dot(H, dot(G, dot(Pi, dot(H, B))))\n",
    "norm_fact = sqrt((V**2).sum(1)[0])\n",
    "Vn = V / norm_fact"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if True:\n",
    "    print 'Row norms of V:', (V**2).sum(1)[:10]\n",
    "    print 'Col norms of V:', (V**2).sum(0)[:10]\n",
    "    print 'Row norms of Vn:', (Vn**2).sum(1)[:10]\n",
    "    print 'Col norms of Vn:', (Vn**2).sum(0)[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def norm_angle_hist(mat, fc, rows=True):\n",
    "    assert mat.shape[0] == mat.shape[1], 'square only'\n",
    "    D = mat.shape[0]\n",
    "    if rows:\n",
    "        norms = sqrt((mat**2).sum(1))\n",
    "        mat_norm = mat / norms.reshape((-1, 1))\n",
    "        prods = dot(mat_norm, mat_norm.T)\n",
    "    else:\n",
    "        # Cols\n",
    "        norms = sqrt((mat**2).sum(0))\n",
    "        mat_norm = mat / norms.reshape((1, -1))\n",
    "        prods = dot(mat_norm.T, mat_norm)\n",
    "    angles = arccos(prods[~eye(D, dtype=bool)]) * 180 / pi\n",
    "    subplot(2, 1, 1)\n",
    "    hist(norms)\n",
    "    subplot(2, 1, 2)\n",
    "    hist(angles, bins=20, fc=fc, normed=True)\n",
    "    title('mean %f, std %f' % (angles.mean(), angles.std()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "h_prods = dot(Vn, Vn.T)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "norm_angle_hist(V, (0, 0, 1, .2))\n",
    "norm_angle_hist(V.T ,(1, 0, 0, .2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "bigG = random.normal(0, 1, (D, D))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "norm_angle_hist(bigG, (0, 0, 1, .2))\n",
    "norm_angle_hist(bigG.T ,(1, 0, 0, .2))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "How to compute normalization factor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "(V**2).sum(1)[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "(G**2).sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "((H[0]**2).sum() * (G**2).sum())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "(D * (G**2).sum())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "## Tensorflow, Complete Fastfood Transform"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "in_x = tf.placeholder('float32', name='in_x')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Project from d to D!"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Note: when D is a power of two, normalization is perfect. When it's not, the normalization is approximate. The approximation is better when D is large (D = 1000 seems to produce transformations with jacobian values within 1% of 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "d = 5\n",
    "D = 1024\n",
    "\n",
    "ff_vars, xform = tf_fastfood_transform(in_x, d, D)\n",
    "sess.run(tf.global_variables_initializer())\n",
    "\n",
    "x0 = np.zeros(d)\n",
    "x1 = x0.copy(); x1[0] += 1\n",
    "x2 = x0.copy(); x2[1] += 1\n",
    "x3 = x0.copy(); x3[2] += 1\n",
    "\n",
    "print 'Projecting from %d to %d' % (d, D)\n",
    "print 'Norm offsets when changing the first three components in the reduced space:'\n",
    "print ((sess.run(xform, feed_dict={in_x: x0}) - sess.run(xform, feed_dict={in_x: x1}))**2).sum()\n",
    "print ((sess.run(xform, feed_dict={in_x: x0}) - sess.run(xform, feed_dict={in_x: x2}))**2).sum()\n",
    "print ((sess.run(xform, feed_dict={in_x: x0}) - sess.run(xform, feed_dict={in_x: x3}))**2).sum()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
